clear
set more off

global ecrmen  "\\intra\partages\au_amic2\SRCV_AVRIL2021\BASES_FEV2022_V5\2020\MENAGES"
global ecrind  "\\intra\partages\au_amic2\SRCV_AVRIL2021\BASES_FEV2022_V5\2020\INDIVIDUS"
global ecrfin  "\\intra\partages\au_amic2\SRCV_AVRIL2021\BASES_FEV2022_V5\2020\TOTAL"
global ecrprov "\\intra\partages\au_amic2\SRCV_AVRIL2021\prov_FEV2022_V5" 


/****************************************************************************************/
/* I - APPARIEMENT DES TABLES INDIVIDUS ET MENAGES                                      */ 
/****************************************************************************************/
/**********************************/
/* A - 2008-2019 par idmenc/annee */
/**********************************/
clear 
use "$ecrind\individus_2008_2020.dta", clear
drop if annee_SRCV == 2020
rename men men_from_individu
rename ssech ssech_from_individu
rename indic_replace indic_replace_from_individu
save "$ecrind\individus_2008_2020_prov.dta", replace

clear
use "$ecrmen\menages_2008_2020.dta", clear 
drop if annee_SRCV == 2020
rename men men_from_menage
rename ssech ssech_from_menage
rename indic_replace indic_replace_from_menage
save "$ecrmen\menages_2008_2020_prov.dta", replace

clear 
use "$ecrind\individus_2008_2020_prov.dta", clear
merge m:m idmenc annee_SRCV using "$ecrmen\menages_2008_2020_prov.dta" 
sort idmenc annee_SRCV  
drop _merge
order idmenc annee_SRCV men_from_individu men_from_menage  
save "$ecrprov\ind_men_2008_2020_part1.dta", replace

erase "$ecrind\individus_2008_2020_prov.dta"
erase "$ecrmen\menages_2008_2020_prov.dta"


/**********************************/
/* B - 2020 par ident_men_ue/annee */
/**********************************/
clear 
use "$ecrind\individus_2008_2020.dta", clear
drop if annee_SRCV != 2020
rename men men_from_individu
rename ssech ssech_from_individu
destring rb040, replace
rename rb040 ident_men_ue
rename indic_replace indic_replace_from_individu
save "$ecrind\individus_2008_2020_prov.dta", replace

clear
use "$ecrmen\menages_2008_2020.dta", clear 
drop if annee_SRCV != 2020
rename men men_from_menage
rename ssech ssech_from_menage
rename db030 ident_men_ue
rename indic_replace indic_replace_from_menage
save "$ecrmen\menages_2008_2020_prov.dta", replace

clear 
use "$ecrind\individus_2008_2020_prov.dta", clear
merge m:m ident_men_ue annee_SRCV using "$ecrmen\menages_2008_2020_prov.dta" 
drop _merge
sort ident_men_ue annee_SRCV  
order ident_men_ue annee_SRCV men_from_individu men_from_menage  
save "$ecrprov\ind_men_2008_2020_part2.dta", replace

erase "$ecrind\individus_2008_2020_prov.dta"
erase "$ecrmen\menages_2008_2020_prov.dta"


/***************************************************************************/
/* C - ON NE CONSERVE EN 2020 QUE LES 11 906 INDIVIDUS EXISTANT AUPARAVANT */
/***************************************************************************/

* 2 - on supprime les observations SANS ind alimenté
clear 
use "$ecrprov\ind_men_2008_2020_part2.dta", clear
drop if ind ==""
save "$ecrprov\ind_men_2008_2020_part2.dta", replace
count /* 11 906  */


/*********************************************************************************/
/* D - ON SUPPRIME LES 82 IND SANS men_from_menage                               */
/*********************************************************************************/
clear 
use "$ecrprov\ind_men_2008_2020_part2.dta", clear
drop if men_from_menage ==""
save "$ecrprov\ind_men_2008_2020_part2.dta", replace

/************************************/
/*E - EMPILEMENT 2008-2019 ET 2020 */
/************************************/
clear
use "$ecrprov\ind_men_2008_2020_part1.dta"
append using "$ecrprov\ind_men_2008_2020_part2.dta"
save "$ecrfin\ind_men_2008_2020_def.dta", replace

clear 
use "$ecrfin\ind_men_2008_2020_def.dta", clear
sort men_from_menage ind annee_SRCV
order men_from_menage ind annee_SRCV
save "$ecrfin\ind_men_2008_2020_def.dta", replace


/**************************************************************************************************/
/* II - REMPLACEMENT DES ZERO PAR DES POINTS DANS LES 19 VARIABLES SERVANT AU CALCUL DES AGREGATS */
/*      + LA VARIABLE HY020 SERVANT A LA COMPARAISON DANS LE PROGRAMME 4                          */
/**************************************************************************************************/
* si variables à point alors on remplace par zéro
clear 
use "$ecrfin\ind_men_2008_2020_def.dta", clear
foreach var of varlist PY100N PY110N PY010N PY050N PY090N PY120N PY130N PY140N HY040N HY050N HY060N HY070N HY080N HY090N HY100N HY110N HY120N HY130N HY145N HY020 IRPP TH PY021G PY080N PY080G {
replace `var'= 0 if `var' == .
}
save "$ecrfin\ind_men_2008_2020_def.dta", replace 



/************************************************************************/
/* III - APPLICATION DE LA CONDITION ADULTVRAIREP == 1                  */
/*      AFIN DE NE CONSERVER DANS LA TABLE QUE LES INDIVIDUS DE PLUS    */
/*      DE 16 ANS REPONDANT AU QUESTIONNAIRE                            */
/*      ON CONSERVE LES INDIVIDUS AVEC ADULTVRAIREP = 0                 */
/*      DANS UNE TABLE A PART EN VUE DE LA COMPARAISON FINALE           */
/************************************************************************/
* 1 - suppression des observations adultvrairep == 0 de la table fusionnée pour années <=2019
* ON NE CONSERVE EN 2020 QUE LES INDIVIDUS >= 16 ANS en l'absence de adultvrairep
clear
use "$ecrfin\ind_men_2008_2020_def.dta", clear
gen indi_supp = 0
replace indi_supp = 1 if annee_SRCV<=2019 & adultvrairep == 0  
replace indi_supp = 1 if annee_SRCV==2020 & age < 16
tab indi_supp, missing
order annee_SRCV adultvrairep age indi_supp
save "$ecrfin\ind_men_2008_2020_def.dta", replace

clear
use "$ecrfin\ind_men_2008_2020_def.dta", clear
drop if indi_supp == 1
save "$ecrfin\ind_men_2008_2020_restrict.dta", replace


* 2 - conservation des observations adultvrairep == 0 (2008-2019) et age < 16 (2020)
*     dans une table à part en vue de la comparaison
clear
use "$ecrfin\ind_men_2008_2020_def.dta", clear
keep if indi_supp == 1
keep idmenc annee_SRCV ident_men_ue men_from_menage ind rb030 rb040 db030 adultvrairep age idindc PY010N PY050N PY100N PY090N PY110N PY130N PY120N PY140N PY021G PY080N PY080G 
save "$ecrfin\adultvrairep_a_zero.dta", replace



/************************************************************************/
/* IV - CALCUL DU NOMBRE D'ADULTES DE + 16 ANS COMPOSANT LE MENAGE      */
/*      PAR IDINDC IDMENC                                               */
/************************************************************************/

/**********************************/
/* A - 2008-2019 par idmenc/annee */
/**********************************/
* 1 - nombre de idindc par idmenc
clear
use "$ecrfin\ind_men_2008_2020_restrict.dta", clear
drop if annee_SRCV == 2020
count if idindc ==""
sort idmenc annee_SRCV idindc
order idmenc idindc annee_SRCV
gen compteur = 1
order idmenc idindc annee_SRCV compteur
collapse (count) nb_idindc=compteur, by(idmenc annee_SRCV)
save "$ecrfin\nb_idindc_par_idmenc.dta", replace

* 2 - import du nombre de idindc par idmenc dans la table ind_men_2008_2020_restrict.dta
clear
use "$ecrfin\ind_men_2008_2020_restrict.dta", clear
drop if annee_SRCV == 2020
count
merge m:m idmenc annee_SRCV using "$ecrfin\nb_idindc_par_idmenc.dta" 
sort idmenc annee_SRCV idindc
order idmenc annee_SRCV idindc men_from_menage  ind nb_idindc
drop _merge
label variable nb_idindc			"Nombre d'individus (idindc) par menage annee_SRCV (idmenc) - calcul SAMIC"
save "$ecrfin\ind_men_2008_2020_nb1.dta", replace
sort men_from_menage ind annee_SRCV
order  men_from_menage ind annee_SRCV idmenc idindc nb_idindc

/**********************************/
/* B - 2020 ident_men_ue/annee    */
/**********************************/
* 1 - nombre de idindc par idmenc
clear
use "$ecrfin\ind_men_2008_2020_restrict.dta", clear
drop if annee_SRCV != 2020
count
order rb030 ident_men_ue
sort  rb030 ident_men_ue
destring rb030, replace
count if rb030 == . /* 0 */
count if ident_men_ue == . /* 0 */
sort ident_men_ue rb030 annee_SRCV 
order ident_men_ue rb030 annee_SRCV 
gen compteur = 1
order  ident_men_ue rb030 annee_SRCV compteur
collapse (count) nb_rb030=compteur, by(ident_men_ue annee_SRCV)
save "$ecrfin\nb_rb030_par_identmenue.dta", replace

* 2 - import du nombre de rb030 par ident_men_ue dans la table ind_men_2008_2020_restrict.dta
clear
use "$ecrfin\ind_men_2008_2020_restrict.dta", clear
drop if annee_SRCV != 2020
count
merge m:m ident_men_ue annee_SRCV using "$ecrfin\nb_rb030_par_identmenue.dta" 
sort ident_men_ue annee_SRCV rb030
order ident_men_ue annee_SRCV rb030 nb_rb030
drop _merge
label variable nb_rb030			"Nombre d'individus (rb030) par menage annee_SRCV (ident_men_ue) - calcul SAMIC"
save "$ecrfin\ind_men_2008_2020_nb2.dta", replace
sort men_from_menage ind annee_SRCV
order  men_from_menage ind annee_SRCV idmenc idindc nb_rb030

/************************************/
/* C - EMPILEMENT 2008-2019 ET 2020 */
/************************************/
clear
use "$ecrfin\ind_men_2008_2020_nb1.dta"
append using "$ecrfin\ind_men_2008_2020_nb2.dta"
save "$ecrfin\ind_men_2008_2020_NB.dta", replace
count /* 258 194 obs. et 48 var. */

clear
use "$ecrfin\ind_men_2008_2020_NB.dta"
gen nb_ind_par_men = .
replace nb_ind_par_men = nb_rb030  if annee_SRCV == 2020
replace nb_ind_par_men = nb_idindc if annee_SRCV != 2020

order men_from_menage ind annee_SRCV men_from_individu age rb040 db030 ident_men_ue ind rb030 nb_rb030 nb_idindc nb_ind_par_men
sort men_from_menage ind annee_SRCV
save "$ecrfin\ind_men_2008_2020_NB.dta", replace
sort ind annee_SRCV


/******************************************************************************************/
/* V - VARIABLES MENAGE (H) :                                                             */
/*      PROPOSITION 1 : ON DIVISE PAR LE NOMBRE D'INDIVIDUS COMPOSANT LE MENAGE           */
/*      PROPOSITION 2 - VARIANTE : ON REPARTIT CHAQUE MONTANT EN FONCTION DU POIDS        */
/*                                 DU LABOUR_INCOME DE CHAQUE INDIVIDU DANS LE MENAGE     */
/******************************************************************************************/
/************************/
/* A - PROPOSITION 1    */
/************************/
* 1 - création des variables ménage par individu
clear
use "$ecrfin\ind_men_2008_2020_NB.dta", clear
count /* 258 194 obs. et 62 var */
foreach var of varlist HY040N HY050N HY060N HY070N HY080N HY090N HY100N HY110N HY120N HY130N HY145N IRPP TH {
gen P_`var'=`var'/nb_ind_par_men
}
save "$ecrfin\ind_men_2008_2020_fin1.dta", replace

* 2 - labels et création de year */
clear 
use "$ecrfin\ind_men_2008_2020_fin1.dta", clear
label variable P_HY040N		"Revenu net tire de la location de propriete ou terres"
label variable P_HY050N		"Montant net redresse des alloc. famille/enfants"
label variable P_HY060N		"Montant net des alloc. d'exclusion sociale non classées ailleurs"
label variable P_HY070N		"Montant net redresse des aides au logement"
label variable P_HY080N		"Transferts nets percus entre menages (loyers, pensions)"
label variable P_HY090N		"Montant annuel net interets, div., benef. tires d'invest. en capital"
label variable P_HY100N		"Montant annuel net des remboursements interets hypothecaires"
label variable P_HY110N		"Revenus nets percus par personnes moins 16 ans"
label variable P_HY120N		"Montant net impot fortune"
label variable P_HY130N		"Montant net transfert inter-menages verses regulierement en especes"
label variable P_HY145N		"Rembours./encaissements lies a ajustement d'impot"
label variable P_IRPP		"Montant de l IR du ménage"
label variable P_TH 		"Montant de la taxe d habitation du menage"
gen year = annee_SRCV - 1
sort ind annee_SRCV  
order ind annee_SRCV  
save "$ecrfin\ind_men_2008_2020_fin1.dta", replace

/*********************************/
/* B - PROPOSITION 2 : VARIANTE  */
/*********************************/
/******************/
/* B1 - 2008-2019 */
/******************/
* 1 - calcul du total de labour_income par idmenc/annee
clear
use "$ecrfin\ind_men_2008_2020_fin1.dta", clear
drop if annee_SRCV == 2020
gen LI = PY010N + PY050N
save "$ecrfin\ind_men_2008_2020_prov1.dta", replace

clear
use "$ecrfin\ind_men_2008_2020_prov1.dta", clear
collapse (sum) LI_idmenc_annee=LI, by(idmenc annee_SRCV)
save "$ecrfin\LI_idmenc_annee.dta", replace

* 2 - fusion de LI_idmenc_annee avec la table initiale
clear
use "$ecrfin\ind_men_2008_2020_prov1.dta", clear
clear
use "$ecrfin\LI_idmenc_annee.dta", clear
clear
use "$ecrfin\ind_men_2008_2020_prov1.dta", clear
merge m:m idmenc annee_SRCV using "$ecrfin\LI_idmenc_annee.dta"
drop _merge
save "$ecrfin\ind_men_2008_2020_prov2.dta", replace 

* 3 - calcul de share_labour = LI/LI_idmenc_annee
*     calcul des 11 variables Pvar_xxxxx
clear
use "$ecrfin\ind_men_2008_2020_prov2.dta", clear
gen share_labour = LI/LI_idmenc_annee
foreach var of varlist HY040N HY050N HY060N HY070N HY080N HY090N HY100N HY110N HY120N HY130N HY145N IRPP TH {
gen Pvar_`var'=`var' * share_labour
}
save "$ecrfin\ind_men_2008_2020_fin2.dta", replace  

erase "$ecrfin\ind_men_2008_2020_prov1.dta"
erase "$ecrfin\ind_men_2008_2020_prov2.dta"


/******************/
/* B2 - 2020      */
/******************/
* 1 - calcul du total de labour_income par idmenc/annee
clear
use "$ecrfin\ind_men_2008_2020_fin1.dta", clear
drop if annee_SRCV != 2020
gen LI = PY010N + PY050N
save "$ecrfin\ind_men_2008_2020_prov1.dta", replace

clear
use "$ecrfin\ind_men_2008_2020_prov1.dta", clear
collapse (sum) LI_identmenue_annee=LI, by(ident_men_ue annee_SRCV)
save "$ecrfin\LI_identmenue_annee.dta", replace


* 2 - fusion de LI_identmenue_annee avec la table initiale
clear
use "$ecrfin\ind_men_2008_2020_prov1.dta", clear
clear
use "$ecrfin\LI_identmenue_annee.dta", clear
clear
use "$ecrfin\ind_men_2008_2020_prov1.dta", clear
merge m:m ident_men_ue annee_SRCV using "$ecrfin\LI_identmenue_annee.dta"
drop _merge
save "$ecrfin\ind_men_2008_2020_prov2.dta", replace 

* 3 - calcul de share_labour = LI/LI_identmenue_annee
*     calcul des 11 variables Pvar_xxxxx
clear
use "$ecrfin\ind_men_2008_2020_prov2.dta", clear
gen share_labour = LI/LI_identmenue_annee
foreach var of varlist HY040N HY050N HY060N HY070N HY080N HY090N HY100N HY110N HY120N HY130N HY145N IRPP TH {
gen Pvar_`var'=`var' * share_labour
}
save "$ecrfin\ind_men_2008_2020_fin3.dta", replace  

erase "$ecrfin\ind_men_2008_2020_prov1.dta"
erase "$ecrfin\ind_men_2008_2020_prov2.dta"


/******************/
/* B3 - 2008-2020 */
/******************/
clear
use "$ecrfin\ind_men_2008_2020_fin2.dta"
append using "$ecrfin\ind_men_2008_2020_fin3.dta"
save "$ecrfin\ind_men_2008_2020_fin.dta", replace  

/***********************/
/* C - NETTOYAGE TABLE */
/***********************/
* 1 - suppression de 2 observations pour lesquelles "men_from_menage" est différent de "men_from_individu"*/
*     CREATION DE LA TABLE ind_men_2008_2020_fin.dta
clear 
use "$ecrfin\ind_men_2008_2020_fin.dta", clear
drop if idmenc == "003850" & annee_SRCV == 2010 & men_from_menage != men_from_individu
drop if idmenc == "032670" & annee_SRCV == 2010 & men_from_menage != men_from_individu
sort men_from_menage ind annee_SRCV year 
order men_from_menage ind annee_SRCV year  
save "$ecrfin\ind_men_2008_2020_fin.dta", replace


* 2 - suppression de "men_from_individu" , on renomme "men_from_menage" en men */
clear 
use "$ecrfin\ind_men_2008_2020_fin.dta", clear
drop men_from_individu
rename men_from_menage men
sort men ind annee_SRCV year
order men ind annee_SRCV year  
save "$ecrfin\ind_men_2008_2020_fin.dta", replace


* 3 - labels */
clear 
use "$ecrfin\ind_men_2008_2020_fin.dta", clear
label variable Pvar_HY040N		"Variante-Revenu net tire de la location de propriete ou terres"
label variable Pvar_HY050N		"Variante-Montant net redresse des alloc. famille/enfants"
label variable Pvar_HY060N		"Variante-Montant net des alloc. d'exclusion sociale non classées ailleurs"
label variable Pvar_HY070N		"Variante-Montant net redresse des aides au logement"
label variable Pvar_HY080N		"Variante-Transferts nets percus entre menages (loyers, pensions)"
label variable Pvar_HY090N		"Variante-Montant annuel net interets, div., benef. tires d'invest. en capital"
label variable Pvar_HY100N		"Variante-Montant annuel net des remboursements interets hypothecaires"
label variable Pvar_HY110N		"Variante-Revenus nets percus par personnes moins 16 ans"
label variable Pvar_HY120N		"Variante-Montant net impot fortune"
label variable Pvar_HY130N		"Variante-Montant net transfert inter-menages verses regulierement en especes"
label variable Pvar_HY145N		"Variante-Rembours./encaissements lies a ajustement d'impot"
label variable Pvar_IRPP		"Montant de l IR du ménage"
label variable Pvar_TH 			"Montant de la taxe d habitation du menage"
label variable share_labour		"poids des revenus de l'individu dans le total des revenus du ménage"
save "$ecrfin\ind_men_2008_2020_fin.dta", replace
